from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./llama2.pdf")
pages = loader.load_and_split()
print(f"第0页：\n{pages[10].page_content}")

from langchain.text_splitter import RecursiveCharacterTextSplitter
#分割
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 200,
    chunk_overlap= 100,
    length_function=len,
    add_start_index = True,
)

paragraphs = text_splitter.create_document([pages[0].page_content])
for para in paragraphs:
    print(para.page_content)
    print('-------')